# coordinates are 36.17, -115.14
chicago_full <- fread("data/Chicago_Solar_Irradiation/chicago.csv")
head(chicago_full)
## Year Month Day Hour Minute Dew Point Surface Albedo Wind Speed
## 1: 2000 1 1 0 0 0 0.072 7.5
## 2: 2000 1 1 0 30 0 0.072 7.7
## 3: 2000 1 1 1 0 0 0.072 7.9
## 4: 2000 1 1 1 30 0 0.072 7.8
## 5: 2000 1 1 2 0 0 0.072 7.8
## 6: 2000 1 1 2 30 0 0.072 7.7
## Relative Humidity Temperature Pressure GHI Solar Zenith Angle Cloud Type
## 1: 100 0 990 0 161 1
## 2: 100 0 990 0 160 1
## 3: 100 0 990 0 157 1
## 4: 100 0 990 0 152 1
## 5: 100 0 990 0 148 1
## 6: 100 0 990 0 142 1
## Precipitable Water Wind Direction
## 1: 0.738 201
## 2: 0.746 201
## 3: 0.754 203
## 4: 0.765 203
## 5: 0.777 205
## 6: 0.796 205
str(chicago_full)
## Classes 'data.table' and 'data.frame': 367920 obs. of 16 variables:
## $ Year : int 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ Month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Day : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Hour : int 0 0 1 1 2 2 3 3 4 4 ...
## $ Minute : int 0 30 0 30 0 30 0 30 0 30 ...
## $ Dew Point : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Surface Albedo : num 0.072 0.072 0.072 0.072 0.072 0.072 0.072 0.072 0.072 0.072 ...
## $ Wind Speed : num 7.5 7.7 7.9 7.8 7.8 7.7 7.6 7.5 7.4 7.2 ...
## $ Relative Humidity : num 100 100 100 100 100 ...
## $ Temperature : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Pressure : int 990 990 990 990 990 990 990 990 990 990 ...
## $ GHI : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Solar Zenith Angle: num 161 160 157 152 148 ...
## $ Cloud Type : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Precipitable Water: num 0.738 0.746 0.754 0.765 0.777 0.796 0.816 0.846 0.877 0.911 ...
## $ Wind Direction : num 201 201 203 203 205 ...
## - attr(*, ".internal.selfref")=<externalptr>
chicago_full$Date <- make_datetime(year=chicago_full$Year, month=chicago_full$Month, day=chicago_full$Day, hour=chicago_full$Hour, min=chicago_full$Minute)
chicago_full %>% ggplot(aes(x=Date, y=Temperature)) + geom_line(color="blue")

plots <- NULL
# create a graph for each of the 20 years
temp <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(Temperature = mean(Temperature, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = Temperature, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Temperature by Month", colour = "Year")
dew <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`Dew Point` = mean(`Dew Point`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `Dew Point`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Dew Point by Month", colour = "Year")
humidity <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`Relative Humidity` = mean(`Relative Humidity`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `Relative Humidity`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Relative Humidity by Month", colour = "Year")
albedo <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`Surface Albedo` = mean(`Surface Albedo`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `Surface Albedo`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Surface Albedo by Month", colour = "Year")
wind <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`Wind Speed` = mean(`Wind Speed`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `Wind Speed`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Wind Speed by Month", colour = "Year")
pressure <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`Pressure` = mean(`Pressure`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `Pressure`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Pressure by Month", colour = "Year")
windD <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`Wind Direction` = mean(`Wind Direction`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `Wind Direction`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Wind Direction by Month", colour = "Year")
precip <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`Precipitable Water` = mean(`Precipitable Water`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `Precipitable Water`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average Precipitable Water by Month", colour = "Year")
zenith <- chicago_full %>% ggplot(aes(x=`Solar Zenith Angle`)) + geom_histogram(bins=360) + scale_x_continuous(breaks=seq(0,360,30)) + labs(title="Solar Zenith Angle Histogram")
ghi <- chicago_full %>%
# average by year-month
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm = TRUE), .groups = "drop") %>%
ggplot() +
geom_line(aes(x = Month, y = `GHI`, color = factor(Year))) +
scale_x_continuous(breaks = 1:12, labels = month.abb, minor_breaks = NULL) +
labs(title = "Average GHI by Month", colour = "Year")
cloud <- chicago_full %>% ggplot(aes(x=`Cloud Type`)) + geom_histogram(bins=11) + scale_x_continuous(breaks=seq(0,10,1))+ labs(title="Cloud Type")
vsTemp <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Temperature` = mean(`Temperature`, na.rm=TRUE)) %>%
ggplot(aes(x=`Temperature`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsDew <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Dew Point` = mean(`Dew Point`, na.rm=TRUE)) %>%
ggplot(aes(x=`Dew Point`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsHumid <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Relative Humidity` = mean(`Relative Humidity`, na.rm=TRUE)) %>%
ggplot(aes(x=`Relative Humidity`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsAlbedo <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Surface Albedo` = mean(`Surface Albedo`, na.rm=TRUE)) %>%
ggplot(aes(x=`Surface Albedo`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsWindS <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Wind Speed` = mean(`Wind Speed`, na.rm=TRUE)) %>%
ggplot(aes(x=`Wind Speed`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsPressure <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Pressure` = mean(`Pressure`, na.rm=TRUE)) %>%
ggplot(aes(x=`Pressure`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsWindD <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Wind Direction` = mean(`Wind Direction`, na.rm=TRUE)) %>%
ggplot(aes(x=`Wind Direction`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsPrecip <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Precipitable Water` = mean(`Precipitable Water`, na.rm=TRUE)) %>%
ggplot(aes(x=`Precipitable Water`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsWindD <- chicago_full %>%
group_by(Year, Month) %>%
summarise(`GHI` = mean(`GHI`, na.rm=TRUE), `Wind Direction` = mean(`Wind Direction`, na.rm=TRUE)) %>%
ggplot(aes(x=`Wind Direction`, y=GHI)) + geom_point()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
vsZenith <- chicago_full %>%
group_by(`Solar Zenith Angle`) %>%
summarise(average_ghi = mean(GHI, na.rm=TRUE)) %>%
ggplot(aes(x=`Solar Zenith Angle`, y=average_ghi)) + geom_point()
vsCloud <- chicago_full %>%
group_by(`Cloud Type`) %>%
summarise(average_ghi = mean(GHI, na.rm=TRUE)) %>%
ggplot(aes(x=`Cloud Type`, y=average_ghi)) + geom_point()
plots <- list(temp, dew, humidity, albedo, wind, pressure, windD, precip, zenith, ghi, cloud)
plots
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

plotsVs <- list(vsTemp, vsDew, vsHumid, vsAlbedo, vsWindS, vsPressure, vsWindD, vsPrecip, vsZenith, vsCloud)
plotsVs
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

summary(chicago_full)
## Year Month Day Hour Minute
## Min. :2000 Min. : 1.00 Min. : 1.0 Min. : 0.00 Min. : 0
## 1st Qu.:2005 1st Qu.: 4.00 1st Qu.: 8.0 1st Qu.: 5.75 1st Qu.: 0
## Median :2010 Median : 7.00 Median :16.0 Median :11.50 Median :15
## Mean :2010 Mean : 6.53 Mean :15.7 Mean :11.50 Mean :15
## 3rd Qu.:2015 3rd Qu.:10.00 3rd Qu.:23.0 3rd Qu.:17.25 3rd Qu.:30
## Max. :2020 Max. :12.00 Max. :31.0 Max. :23.00 Max. :30
## Dew Point Surface Albedo Wind Speed Relative Humidity
## Min. :-26.90 Min. :0.053 Min. : 0.0 Min. : 31.4
## 1st Qu.: -1.00 1st Qu.:0.068 1st Qu.: 2.7 1st Qu.: 73.0
## Median : 6.00 Median :0.075 Median : 4.1 Median : 83.8
## Mean : 6.23 Mean :0.194 Mean : 4.4 Mean : 82.1
## 3rd Qu.: 14.50 3rd Qu.:0.100 3rd Qu.: 5.8 3rd Qu.: 93.4
## Max. : 26.30 Max. :0.870 Max. :15.9 Max. :100.0
## Temperature Pressure GHI Solar Zenith Angle
## Min. :-26.9 Min. : 950 Min. : 0 Min. : 18.5
## 1st Qu.: 1.0 1st Qu.: 990 1st Qu.: 0 1st Qu.: 62.5
## Median : 9.0 Median : 990 Median : 0 Median : 89.6
## Mean : 9.5 Mean : 990 Mean : 165 Mean : 89.7
## 3rd Qu.: 18.0 3rd Qu.: 994 3rd Qu.: 265 3rd Qu.:116.8
## Max. : 36.0 Max. :1022 Max. :1021 Max. :161.4
## Cloud Type Precipitable Water Wind Direction
## Min. : 0.00 Min. :0.08 Min. : 0
## 1st Qu.: 1.00 1st Qu.:0.84 1st Qu.:101
## Median : 4.00 Median :1.58 Median :200
## Mean : 3.59 Mean :1.87 Mean :188
## 3rd Qu.: 7.00 3rd Qu.:2.66 3rd Qu.:273
## Max. :10.00 Max. :7.12 Max. :360
## Date
## Min. :2000-01-01 00:00:00.00
## 1st Qu.:2005-04-02 05:52:30.00
## Median :2010-07-02 11:45:00.00
## Mean :2010-07-02 14:04:12.55
## 3rd Qu.:2015-10-01 17:37:30.00
## Max. :2020-12-31 23:30:00.00
#names(chicago_full)
chicago_full$Zenith_Bins <- cut(chicago_full$`Solar Zenith Angle`, breaks = seq(0, 180, by = 10))
# Plotting irradiance vs zenith angle
# The gray dots are outliers of solar irradiance, likely caused by clouds
ggplot(data = chicago_full, aes(x = Zenith_Bins, y = GHI)) +
geom_boxplot(outlier.color = "gray", size = 0.5) +
labs(x = "Solar Zenith Angle (degrees)", y = "Global Horizontal Irradiance (W/m²)",
title = "Distribution of GHI for Different Solar Zenith Angle (Grouped by bins of 10 degrees)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# plotting solar zenith against time of day
ggplot(data = chicago_full, aes(x = Hour, y = `Solar Zenith Angle`, group = interaction(Month, Day), color = factor(Month))) +
geom_line() +
labs(x = "Time of Day (Hour)", y = "Solar Zenith Angle (degrees)",
title = "Solar Zenith Angle by Time of Day (Spaghetti Plot)") +
scale_color_discrete(name = "Month", labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
theme_minimal() +
theme(legend.position = "top", axis.text.x = element_text(angle = 45, hjust = 1))

# shows the difference between solstices, and their effect on solar zenith angle
filtered_data <- chicago_full %>%
filter(Month %in% c(6, 12))
ggplot(data = filtered_data, aes(x = Hour, y = `Solar Zenith Angle`, group = interaction(Month, Day), color = factor(Month))) +
geom_line() +
labs(x = "Time of Day (Hour)", y = "Solar Zenith Angle (degrees)",
title = "Solar Zenith Angle by Time of Day (June and December Only)") +
scale_color_discrete(name = "Month", labels = c("June", "December")) +
theme_minimal() +
theme(legend.position = "top", axis.text.x = element_text(angle = 45, hjust = 1))

#plotting how solar irradiance is affected by time of month
chicago_full$Month <- factor(chicago_full$Month, levels = 1:12, labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
agg_data <- chicago_full %>%
group_by(Month, Hour) %>%
summarize(Avg_GHI = mean(GHI))
## `summarise()` has grouped output by 'Month'. You can override using the
## `.groups` argument.
ggplot(data = chicago_full, aes(x = Hour, y = GHI, group = interaction(Month, Hour), color = Month)) +
geom_line(size = 1.5, alpha = 0.7) + # Set the size to 1.5 (adjust as needed)
labs(x = "Hour of the Day", y = "Average Global Horizontal Irradiance (W/m²)",
title = "Average Solar Irradiance by Hour for Each Month (Spaghetti Plot)") +
scale_x_continuous(breaks = seq(0, 23, by = 1)) +
theme_minimal() +
theme(legend.position = "top", axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# num_unique_clouds <- chicago_full %>%
# distinct(Cloud_Type) %>%
# nrow()
chicago_full <- chicago_full %>%
rename(Cloud_Type = `Cloud Type`)
chicago_full$Cloud_Type <- factor (chicago_full$Cloud_Type)
# Effect of clouds on GHI
ggplot(data = chicago_full, aes(x = Cloud_Type, y = GHI)) +
stat_summary(data = subset(chicago_full, `Solar Zenith Angle` >= 0 & `Solar Zenith Angle` <= 80),
fun = "mean", geom = "bar", fill = "skyblue", color = "black") +
labs(x = "Cloud_Type", y = "Mean Global Horizontal Irradiance (W/m²)",
title = "Effect of Cloud Type on Irradiance (Solar Zenith: 0-80 degrees)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

names(chicago_full)
## [1] "Year" "Month" "Day"
## [4] "Hour" "Minute" "Dew Point"
## [7] "Surface Albedo" "Wind Speed" "Relative Humidity"
## [10] "Temperature" "Pressure" "GHI"
## [13] "Solar Zenith Angle" "Cloud_Type" "Precipitable Water"
## [16] "Wind Direction" "Date" "Zenith_Bins"
# temperature vs irradiance boxplot
ggplot(data = subset(chicago_full, `Solar Zenith Angle` >= 0 & `Solar Zenith Angle` <= 80),
aes(x = cut(Temperature, breaks = seq(min(Temperature), max(Temperature) + 2, by = 2)), y = GHI)) +
geom_boxplot() +
labs(x = "Temperature (°C)", y = "Global Horizontal Irradiance (W/m²)",
title = "Temperature vs. Global Horizontal Irradiance (Solar Zenith: 0-80 degrees)") +
theme_minimal()

#Surface Albedo vs Irradiance for sunlight hours
filtered_data <- chicago_full %>%
filter(`Solar Zenith Angle` >= 0 & `Solar Zenith Angle` <= 80,
`Surface Albedo` >= 0.05 & `Surface Albedo` <= 0.30)
filtered_data <- filtered_data %>%
mutate(Albedo_Bin = cut(`Surface Albedo`, breaks = seq(0.05, 0.300, by = 0.002)),)
ggplot(data = filtered_data, aes(x = Albedo_Bin, y = GHI)) +
geom_boxplot(outlier.color = "gray", size = 0.5) +
labs(x = "Surface Albedo", y = "Global Horizontal Irradiance (W/m²)",
title = "Distribution of GHI for Different Surface Albedo (Grouped by bins of 0.002)\n(Zenith: 0-80 degrees)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Relative humidity vs Solar Irradiance for zentih 0-80
filtered_data <- filtered_data %>%
mutate(RelHumidity_Bin = cut(`Relative Humidity`, breaks = seq(0, 100, by = 2.5)))
ggplot(data = filtered_data, aes(x = RelHumidity_Bin, y = GHI)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(x = "Relative Humidity (%)", y = "Global Horizontal Irradiance (W/m²)",
title = "GHI vs. Relative Humidity (Zenith: 0-80 degrees, Bin Size: 2.5%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Precipitable Water vs GHI (for hours during sunlight)
filtered_data <- filtered_data %>%
mutate(PrecipitableWater_Bin = cut(`Precipitable Water`, breaks = seq(min(`Precipitable Water`), max(`Precipitable Water`), by = 0.15)))
ggplot(data = filtered_data, aes(x = PrecipitableWater_Bin, y = GHI)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(x = "Precipitable Water (Bin Size: 0.05)", y = "Global Horizontal Irradiance (W/m²)",
title = "GHI vs. Precipitable Water (Zenith: 0-80 degrees)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
